library(sf)
library(readxl)
library(tidyverse)
library(mapview)
library(gtfstools)
library(stringr)
library(gt)
mapviewOptions(fgb = FALSE)Estimating Directional Route Mileage by UZA
UZA Work
We write a function to grab the files from the Census Bureau site and perform light filtering and manipulation.
sf_from_zip <- function(URL) {
# based on:
# https://stackoverflow.com/questions/59740419/unzipping-and-reading-shape-file-in-r-without-rgdal-installed
# Load local temp files
temp_0 <- tempfile()
temp_1 <- tempfile()
# download the zip, put it in the first temp directory
download.file(URL, temp_0)
# Unzip and place in temp_1
unzip(zipfile = temp_0, exdir = temp_1)
# Read the shapefile.
geom_sf <- sf::read_sf(temp_1)
out <- geom_sf
}# Record the URLs.
UZA20_URL <- "https://www2.census.gov/geo/tiger/TIGER2020/UAC/tl_2020_us_uac20.zip"
# Obtain the data.
UZA20 <- sf_from_zip(UZA20_URL)
# Read data from population file:
# https://www.transit.dot.gov/ntd/2020-census-changes-uzapopulation
UZA20_pop <- read_xlsx("../data/base/UZA_CHANGES_1990-2020_2_2.xlsx",
sheet = "UZA 2020 Master",
col_types = "text") |>
janitor::clean_names() |>
mutate(x2020_uace = str_pad(x2020_uace, side = "left", pad = "0", 5))
# Transform to state coordinate system.
UZA20 <- UZA20 |> sf::st_transform(26986)
NE <- c("MA", "CT", "RI", "VT", "NH", "ME")
NE_collapse <- paste0(NE,collapse = "|")
# There are only Urban Areas in 2020.
UZA20_NE <- UZA20 |>
filter(str_detect(NAME20, NE_collapse)) |>
left_join(UZA20_pop |> select(x2020_uace, x2020_population), by = c("GEOID20" = "x2020_uace")) |>
mutate(UZA_type = if_else(!is.na(x2020_population),
"Pop: GTE 50k",
"Pop: LT 50k"))
saveRDS(UZA20_NE, "../data/processed/UZA20_NE.rds")CR Line Work
We start by downloading the last Summer 2023 GTFS file from the MBTA’s GTFS archive. This file contains information for the CapeFlyer and has the recent Foxboro Branch. We remove the segments running from the Providence Line to Foxboro as they are not typical service.
# This is the last Summer 2022 GTFS schedule.
# We chose this because it contains the CapeFlyer if we need it.
gtfs_summ23 <- gtfstools::read_gtfs("https://cdn.mbtace.com/archive/20230810.zip")
# Convert shapes to MA system.
gtfs_summ23_shp <- convert_shapes_to_sf(gtfs_summ23) |>
st_transform(26986)
# Attach them to our route_ids so we can have hints about what each shape is.
gtfs_summ23_shp <-
inner_join(gtfs_summ23_shp,
gtfs_summ23$trips |> distinct(route_id, shape_id),
by = "shape_id")
gtfs_summ23_stp <- gtfs_summ23 |> convert_stops_to_sf()
gtfs_summ23_stp <- gtfs_summ23_stp |> filter(vehicle_type == 2) |>
distinct(stop_name, .keep_all = TRUE)We need to filter the shapes for commuter rail lines. We can view the shapes to confirm that this matches the system as we know it.
gtfs_summ23_shp_filt <- gtfs_summ23_shp |>
filter(str_detect(route_id, "CR-") | str_detect(route_id, "Cape")) |> filter(!shape_id %in% c("canonical-SouFoxProv", "canonical-ProvidenceToSouthStationViaFoxboro", "ProvidenceToSouthStationViaFoxboro")) # remove fox from; south not regular service
gtfs_summ23_shp_filt |> mapview(zcol = "route_id", color = viridis::turbo)The shapes representing some of the same tracks are very close but not exactly the same. If not addressed, this would cause an inflation in directional route miles. We can buffer the routes slightly to coalesce similar track segments.
Zoom into a station like Readville to see the effects of the buffer on the final result. You’ll notice a “cupping” where we would really want something resembling an “X” through the junction.
# Buffer out by 20m, then in by -19 meters -1*(20-1). A visual inspection suggested that 20m was enough distance to capture nearby alignments.
d <- 20
gtfs_summ23_shp_filt_buff <- gtfs_summ23_shp_filt |>
st_buffer(d, endCapStyle = "SQUARE", joinStyle = "ROUND") |>
summarize() |>
st_buffer(-1 * (d-1), endCapStyle = "SQUARE", joinStyle = "ROUND")
mapview(gtfs_summ23_shp_filt_buff) +
mapview(gtfs_summ23_shp_filt,
zcol = "route_id",
color = viridis::turbo)We take the outside line of the buffer which gives us an approximation of the length of the routes. In this case, because we are concerned with directional route miles, each line can be thought of as one direction of the route, meaning we do not have to divide by two to get the length of the centerline.
gtfs_summ23_shp_line <- gtfs_summ23_shp_filt_buff |> st_cast(to = "MULTILINESTRING")saveRDS(gtfs_summ23_shp_line,
"../data/processed/gtfs_summ23_shp_line.rds")Perform Intersections
Read in the pre-generated datasets.
# Read in trains
trains_pass <- readRDS("../data/processed/gtfs_summ23_shp_line.rds")
# Read in UZA
UZA20_NE <- readRDS("../data/processed/UZA20_NE.rds")
# Create labels for helpful hovering.
labs20 <- UZA20_NE$NAME20We perform an intersection over each of the datasets.
trains_pass_UZA20 <- st_intersection(trains_pass, UZA20_NE)We map the datasets, showing only UZAs in MA.